#!/usr/local/bin/python
# A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96)
# Reads a text file given on standard input or named as first argument, and
# generates HTML 2.0 on standard output. Recognizes these constructions:
#
# HTML element pattern at the beginning of a line
#
# section heading ()+
# numbered list element <1-2 spaces>()+
# unnumbered list element <0-2 spaces>
# preformatted section
#
# Heading level is determined by the number of () segments.
# Blank lines force a separation of elements; if none of the above four
# types is indicated, a new paragraph begins. A line beginning with many
# spaces is interpreted as a continuation (instead of preformatted) after
# a list element. Headings are anchored; paragraphs starting with "Q." are
# emphasized, and those marked with "A." get their first sentence emphasized.
#
# Hyperlinks are created from references to:
# URLs, explicitly marked using
# other questions, of the form "question ()*"
# sections, of the form "section ".
import sys, string, regex, regsub, regex_syntax
regex.set_syntax(regex_syntax.RE_SYNTAX_AWK)
# --------------------------------------------------------- regular expressions
orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +')
itemprog = regex.compile(' ? ?[-*] +')
headingprog = regex.compile('([1-9][0-9]*\.)+ +')
prefmtprog = regex.compile(' ')
blankprog = regex.compile('^[ \t\r\n]$')
questionprog = regex.compile(' *Q\. +')
answerprog = regex.compile(' *A\. +')
sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)')
mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To'
'|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold)
urlprog = regex.compile('<URL:([^&]+)>')
addrprog = regex.compile('<([^>@:]+@[^&@:]+)>')
qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)')
srefprog = regex.compile('section +([1-9][0-9]*)')
entityprog = regex.compile('[&<>]')
# ------------------------------------------------------------ global variables
body = []
ollev = ullev = 0
element = content = secnum = version = ''
# ----------------------------------------------------- for making nested lists
def dnol():
global body, ollev
ollev = ollev + 1
if body[-1] == '': del body[-1]
body.append('')
def upol():
global body, ollev
ollev = ollev - 1
body.append(ollev and '
' or '')
# --------------------------------- output one element and convert its contents
def spew(clearol=0, clearul=0):
global content, body, ollev, ullev
if content:
if entityprog.search(content) > -1:
content = regsub.gsub('&', '&', content)
content = regsub.gsub('<', '<', content)
content = regsub.gsub('>', '>', content)
n = questionprog.match(content)
if n > 0:
content = '' + content[n:] + ''
if ollev: # question reference in index
fragid = regsub.gsub('^ +|\.? +$', '', secnum)
content = '%s' % (fragid, content)
if element[0] == 'h': # heading in the main text
fragid = regsub.gsub('^ +|\.? +$', '', secnum)
content = secnum + '%s' % (fragid, content)
n = answerprog.match(content)
if n > 0: # answer paragraph
content = regsub.sub(sentprog, '\\1', content[n:])
body.append('<' + element + '>' + content)
body.append('')
content = ''
while clearol and ollev: upol()
if clearul and ullev: body.append(''); ullev = 0
# ---------------------------------------------------------------- main program
faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin
lines = faq.readlines()
for line in lines:
if line[2:9] == '=======': #
will appear *before*
body.append('
') # the underlined heading
continue
n = orditemprog.match(line)
if n > 0: # make ordered list item
spew(0, 'clear ul')
secnum = line[:n]
level = string.count(secnum, '.')
while level > ollev: dnol()
while level < ollev: upol()
element, content = 'li', line[n:]
continue
n = itemprog.match(line)
if n > 0: # make unordered list item
spew('clear ol', 0)
if ullev == 0: body.append(''); ullev = 1
element, content = 'li', line[n:]
continue
n = headingprog.match(line)
if n > 0: # make heading element
spew('clear ol', 'clear ul')
secnum = line[:n]
sys.stderr.write(line)
element, content = 'h%d' % string.count(secnum, '.'), line[n:]
continue
n = 0
if not secnum: # haven't hit body yet
n = mailhdrprog.match(line)
v = version and -1 or regex.match('Version: ', line)
if v > 0 and not version: version = line[v:]
if n <= 0 and element != 'li': # not pre if after a list item
n = prefmtprog.match(line)
if n > 0: # make preformatted element
if element == 'pre':
content = content + line
else:
spew('clear ol', 'clear ul')
element, content = 'pre', line
continue
if blankprog.match(line) > 0: # force a new element
spew()
element = ''
elif element: # continue current element
content = content + line
else: # no element; make paragraph
spew('clear ol', 'clear ul')
element, content = 'p', line
spew() # output last element
body = string.joinfields(body, '')
body = regsub.gsub(urlprog, '\\1', body)
body = regsub.gsub(addrprog, '\\1', body)
body = regsub.gsub(qrefprog, 'question \\1', body)
body = regsub.gsub(srefprog, 'section \\1', body)
print ''
print 'Python Frequently-Asked Questions v' + version
print "(This file was generated using Ping's"
print 'faq2html.py.)'
print body + ''